*** LIS Cross-section Data center in Luxembourg

* email: usersupport@lisdatacenter.org 

*** LIS Self Teaching Package 2022

*** Part I: Inequality, poverty, and social policy
*** Stata version

* last change of this version of the syntax: 15-01-2022.


** Exercise 9: Producing graphs

global varshh "dname dhi hwgt nhhmem" 
global datasets "gt06 us04 dk04" 

program define make_data 
foreach ccyy in $datasets { 
	use $varshh using $`ccyy'h, clear 
	if "`ccyy'" != "gt06" { 
		append using ${mydata}exercise-part1
	} 
	save ${mydata}exercise-part1, replace 
} 
end

program define make_variables
encode dname, gen (ctry)
gen miss_comp = 0
quietly replace miss_comp=1 if dhi==.
quietly drop if miss_comp==1
* select only records if dhi filled 
drop if dhi==. 
* recode negative dhi into zero
cap drop dhi_tb
gen dhi_tb=dhi
replace dhi_tb=0 if dhi<0

* Apply top and bottom codes / outlier detection
gen dhi_log=log(dhi_tb) 
* keep negatives and 0 in the overall distribution of non-missing dhi 
replace dhi_log=0 if dhi_log==. & dhi_tb!=.  
* detect interquartile range 
qui sum dhi_log [w=hwgt],de 
gen iqr=r(p75)-r(p25) 
* detect upper bound for extreme values 
gen upper_bound=r(p75) + (iqr * 3) 
gen lower_bound=r(p25) - (iqr * 3) 
* top code income at upper bound for extreme values 
replace dhi_tb=exp(upper_bound) if dhi_tb>exp(upper_bound)  
* bottom code income at lower bound for extreme values 
replace dhi_tb=exp(lower_bound) if dhi_tb<exp(lower_bound)  

cap drop edhi_tb
gen edhi_tb = dhi_tb/(nhhmem^0.5)
end

* Graph for Lorenz Curve
quietly make_data 
use dname dhi hwgt nhhmem using ${mydata}exercise-part1, clear
quietly make_variables
lorenz estimate edhi_tb [w=hwgt*nhhmem], over (ctry)
lorenz graph, aspectratio(1) xlabel (, grid) overlay
graphexportpdf $mypdf/graph-lorenz
